import requests
from bs4 import BeautifulSoup
import pandas
newarry = []
user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50'
headers = {'User_Agent': user_agent}
for i in range(1, 100):
    res = requests.get('http://college.gaokao.com/school/tinfo/'+str(i)+'/result/15/1/', headers)
    soup = BeautifulSoup(res.text, 'html.parser')
    items = soup.find(class_='btnFsxBox')
    newarry.append({'college': items.select('font')[0].text,
                    'area': items.select('font')[1].text,
                    'analog': items.select('font')[2].text
                    })
    for items1 in soup.find_all(class_=['szw', 'sz']):
        newarry.append({'years': items1.select('td')[0].text,
                        'lowest': items1.select('td')[1].text,
                        'highest': items1.select('td')[2].text,
                        'average': items1.select('td')[3].text,
                        'enrollment': items1.select('td')[4].text,
                        'batch': items1.select('td')[5].text
                        })
newsdf = pandas.DataFrame(newarry, columns=['college', 'area', 'analog', 'years', 'lowest', 'highest', 'average', 'enrollment', 'batch'])
print(newsdf)
newsdf.to_excel('录取线.xlsx')
